This file belongs to the repository: https://github.com/drisso/awst_analysis.
The code is released with license GPL v3.0.
awstif (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("drisso/awst")
#BiocManager::install("GIS-SP-Group/RCA")
#BiocManager::install("hgu133plus2.db")
rm(list = ls())
library(steFunctions)
library(dendextend)
library(clues)
setwd("~/Dropbox/AWST/mixology/")
jobName <- "mixoloy20200925"
source(url("https://raw.githubusercontent.com/drisso/awst_analysis/master/functions.R"))
#####
save_plots <- FALSE
png_width_large <- 2100
png_height_large <- 500
png_width_small <- width_png <- 700
png_height_small <- 700
png_res <- 1/300
####
results <- matrix(NA, ncol = 8, nrow = 170)
colnames(results) <- c("where", "what", "eca", "ecp", "ari", "G", "noOfClust_Th", "noOfClust_Est")
k <- 0
get_RaceID <- function(eData, is_normalized = FALSE) {
require(RaceID)
if(!is_normalized) eData <- log2(as.matrix(eData) + 1)
sc <- SCseq(as.data.frame(eData))
sc@ndata = sc@expdata
sc@genes = rownames(sc@ndata)
sc@counts = rep(1,ncol(sc@ndata))
names(sc@counts) = colnames(sc@ndata)
sc@cluster$features = sc@genes
sc <- compdist(sc, metric="pearson", FSelect = FALSE, knn = NULL)
sc <- clustexp(sc, sat = TRUE, samp = NULL, cln = NULL, clustnr = 30,
bootnr = 50, rseed = 17000, FUNcluster = "kmedoids")
annotation.df$clustering = as.factor(sc@cluster$kpart)
sc <- compumap(sc)
tmp <- scale(sc@umap)
annotation.df$umap.1 <- tmp[, 1]
annotation.df$umap.2 <- tmp[, 2]
sc <- comptsne(sc)
tmp <- scale(sc@tsne)
annotation.df$tsne.1 <- tmp[, 1]
annotation.df$tsne.2 <- tmp[, 2]
return(annotation.df)
}
get_RCA <- function(eData, is_normalized = FALSE) {
if(prefix == "sc_10x_5cl") {
wwhich <- grep("ENSG00", rownames(eData))
rownames(eData)[wwhich] <- gsub("ENSG0", "", rownames(ddata)[wwhich])
rownames(eData) <- paste("XXXX",rownames(eData),sep="_")
} else {
library(biomaRt)
mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
G_list <- getBM(filters="ensembl_gene_id",
attributes=c("external_gene_name","ensembl_gene_id","external_gene_source"),
values=rownames(ddata), mart=mart)
G_list = G_list[G_list$external_gene_source == "HGNC Symbol",]
G_list$format_name = paste("XXXX",G_list$external_gene_name,G_list$ensembl_gene_id,sep="_")
rownames(G_list) = G_list$ensembl_gene_id
both <- intersect(rownames(G_list), rownames(eData))
eData <- eData[both,]
G_list <- G_list[both,]
rownames(eData) <- G_list$format_name
}
require(SingleCellExperiment)
if(!is_normalized) {
sce <- SingleCellExperiment(
assays = list(
counts = eData,
logcounts = log2(eData + 1)
), colData = annotation.df)
} else {
sce <- SingleCellExperiment(
assays = list(
counts = 2^eData,
logcounts = eData
), colData = annotation.df)
}
require(preprocessCore)
require(flashClust)
require(RCA)
data_obj = dataConstruct(as.matrix(logcounts(sce)))
data_obj$geneFilter <- rep(TRUE, nrow(logcounts(sce)))
data_obj$fpkm_transformed <- data_obj$fpkm_raw
data_obj = featureConstruct(data_obj, method = "GlobalPanel")
set.seed(20742579)
data_obj = cellClust(data_obj, method = "hclust", deepSplit_wgcna = 1,
min_group_Size_wgcna = 5)
annotation.df$clustering <- factor(data_obj$group_labels_color$groupLabel)
pr = scale(prcomp(t(scale(data_obj$fpkm_for_clust)))$x)
annotation.df$pca.1 <- pr[, 1]
annotation.df$pca.2 <- pr[, 2]
return(annotation.df)
}
get_SC3 <- function(eData, is_normalized =FALSE) {
require(SingleCellExperiment)
require(SC3)
if(!is_normalized) {
sce <- SingleCellExperiment(
assays = list(
counts = eData,
logcounts = log2(eData + 1)
), colData = annotation.df)
} else {
sce <- SingleCellExperiment(
assays = list(
counts = 2^eData,
logcounts = eData
), colData = annotation.df)
}
rowData(sce)$feature_symbol <- rownames(sce)
sce <- sc3_estimate_k(sce)
k_est <- sce@metadata$sc3$k_estimation
sce <- sc3(sce, ks = k_est, biology = FALSE, n_cores=2, k_estimator = FALSE, rand_seed=2333333, gene_filter = FALSE)
eval(parse(text=paste0("ans <- colData(sce)$sc3_", k_est, "_clusters")))
annotation.df$clustering <- ans
return(annotation.df)
}
get_Seurat <- function(eData, resolution = 0.6, is_normalized = FALSE) {
require(Seurat)
hi_var_features <- rownames(eData)
srt <- CreateSeuratObject(eData, project = prefix)
if(!is_normalized) {
srt <- NormalizeData(srt, normalization.method = "LogNormalize", scale.factor = 10000, verbose = FALSE)
srt <- ScaleData(srt, features = hi_var_features, verbose = FALSE)
} else {
srt@assays$RNA@scale.data <- eData
}
srt <- RunPCA(srt, features = hi_var_features, verbose = FALSE)
srt <- FindNeighbors(srt, verbose = FALSE)
srt <- FindClusters(object = srt, resolution = resolution,
algorithm = 1, n.start = 100, graph.name = NULL,
n.iter = 10, random.seed = 0, verbose = FALSE)
annotation.df$clustering = as.factor(srt@active.ident)
tmp <- scale(srt@reductions$pca@cell.embeddings)
annotation.df$pca.1 <- tmp[, 1]
annotation.df$pca.2 <- tmp[, 2]
srt <- RunUMAP(srt, features = hi_var_features, verbose = FALSE)
tmp <- scale(srt@reductions$umap@cell.embeddings)
annotation.df$umap.1 <- tmp[, 1]
annotation.df$umap.2 <- tmp[, 2]
srt <- RunTSNE(srt, features = hi_var_features)
tmp <- scale(srt@reductions$tsne@cell.embeddings)
annotation.df$tsne.1 <- tmp[, 1]
annotation.df$tsne.2 <- tmp[, 2]
return(annotation.df)
}
get_clusterExperiment <- function(eData, is_normalized = FALSE) {
require(clusterExperiment)
if(!is_normalized) {
sce <- SingleCellExperiment(
assays = list(
counts = eData,
logcounts = log2(eData + 1)
), colData = annotation.df)
} else {
sce <- SingleCellExperiment(
assays = list(
counts = 2^eData,
logcounts = eData
), colData = annotation.df)
}
reducedDim(sce, "PCA") <- irlba::prcomp_irlba(assays(sce, "logcounts", n=5))$x
se = RSEC(sce,
whichAssay = "logcounts",
minSizes=5, reduceMethod="PCA", nReducedDims=5, ncores=4, random.seed=176201,
dendroReduce="PCA", dendroNDims = 5,
mergeMethod = "adjP", mergeDEMethod = "limma",
mergeCutoff = 0.1, mergeLogFCcutoff = 1)
tmp <- primaryCluster(se)
tmp[tmp < 0] <- NA
annotation.df$clustering <- tmp
return(annotation.df)
}
con <- gzcon(url("https://github.com/LuyiTian/sc_mixology/blob/master/data/csv/sc_10x_5cl.metadata.csv.gz?raw=true"))
annotation.df <- read.csv(textConnection(readLines(con)))
annotation.df$cell.col <- factor(annotation.df$cell_line)
levels(annotation.df$cell.col) <- c("gold", "red", "blue", "magenta", "green3")
annotation.df$cell.col <- as.character(annotation.df$cell.col)
annotation.df$cell_line <- paste0("mix", annotation.df$mix)
con <- gzcon(url("https://github.com/LuyiTian/sc_mixology/blob/master/data/csv/sc_10x_5cl.count.csv.gz?raw=true"))
ddata <- read.csv(textConnection(readLines(con)))
both <- intersect(colnames(ddata), rownames(annotation.df))
ddata <- ddata[, both]
annotation.df <- annotation.df[both,]
prefix <- "sc_10x_5cl"
save(ddata, annotation.df, prefix, file = paste0(prefix, "_counts.RData"))
require(awst)
require(EDASeq)
require(Rtsne)
require(umap)
require(SingleCellExperiment)
####
#load(paste0(prefix, "_counts.RData"))
#load(paste0(prefix, "_expression.RData"))
####
no_of_detected_gene_per_sample <- colSums(ddata > 0)
fivenum(no_of_detected_gene_per_sample)
ddata <- EDASeq::betweenLaneNormalization(as.matrix(ddata), which = "full", round = FALSE)
# apply the AWS-transformation
tmp <- rowSums(ddata)
sum(tmp > 0)
tmp <- colSums(ddata)
sum(tmp > 0)
exprData <- awst(ddata, poscount = TRUE, full_quantile = TRUE)
sum(is.na(rowSums(exprData)))
sum(is.na(colSums(exprData)))
save(exprData, prefix, file = paste0(prefix, "_expression.RData"))
#dim(exprData <- gene_filter(exprData))
#write.table(exprData, file = paste0(prefix, "_expression.tsv"), sep = "\t")
nrow_exprData <- nrow(exprData)
ncol_exprData <- ncol(exprData)
ddist <- dist(exprData)
save(ddist, nrow_exprData, ncol_exprData, prefix, file = paste0(prefix, "_expression_dist.RData"))
hhc <- hclust(ddist, method = "ward.D2")
aCalinski <- calinski(hhc)
pprcomp <- prcomp(exprData) # Run PC analysis
pprcomp$x <- pprcomp$x[, 1:5]
pprcomp$rotation <- pprcomp$rotation[, 1:5]
set.seed(2020)
ans_Rtsne <- Rtsne(exprData, pca = FALSE) # Run TSNE
set.seed(2020)
ans_umap <- umap(exprData) # Run Umap
require(SingleCellExperiment)
rm(tmp)
tmp <- get_RCA(ddata)
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RCA_raw <- tmp[, -wwhich]
# user system elapsed
rm(tmp)
system.time({tmp <- get_RCA(t(exprData), is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RCA_awst <- tmp[, -wwhich]
# user system elapsed
system.time({ans_SC3_raw <- get_SC3(as.matrix(ddata))[, c("cell.col", "clustering")]})
# user system elapsed
# 130.476 9.860 2821.177 sc_10x_5cl
# 12.496 2.868 474.091 sc_10x
# 2.284 0.892 55.425 RNAmix_celseq2
system.time({ans_SC3_awst <- get_SC3(t(exprData), is_normalized = TRUE)[, c("cell.col", "clustering")]})
# user system elapsed
# 150.964 13.932 3057.959 sc_10x_5cl
# 12.664 2.572 466.772 sc_10x
# 2.540 0.888 57.347 RNAmix_celseq2
system.time({ans_clusterExp_raw <- get_clusterExperiment(ddata, is_normalized = FALSE)[, c("cell.col", "clustering")]})
system.time({ans_clusterExp_awst <- get_clusterExperiment(t(exprData), is_normalized = TRUE)[, c("cell.col", "clustering")]})
rm(tmp)
system.time({tmp <- get_Seurat(ddata, is_normalized = FALSE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_raw_LoR <- tmp[, -wwhich]
# user system elapsed
# 14.412 0.432 14.902 RNAmix_celseq2
# 36.192 0.832 37.031 sc_10x
rm(tmp)
system.time({tmp <- get_Seurat(t(exprData), is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_awst_LoR <- tmp[, -wwhich]
# user system elapsed
# 11.464 0.184 11.651 RNAmix_celseq2
# 33.304 1.124 34.431 sc_10x
rm(tmp)
system.time({tmp <- get_Seurat(ddata, is_normalized = FALSE, resolution = 1.6)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_raw_HiR <- tmp[, -wwhich]
# user system elapsed
# 11.560 0.152 11.713 RNAmix_celseq2
# 35.472 0.792 36.280 sc_10x
rm(tmp)
system.time({tmp <- get_Seurat(t(exprData), is_normalized = TRUE, resolution = 1.6)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_awst_HiR <- tmp[, -wwhich]
# user system elapsed
# 11.692 0.140 11.836 RNAmix_celseq2
# 34.184 1.232 35.418 sc_10x
rm(tmp)
system.time({tmp <- get_RaceID(ddata, is_normalized = FALSE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RaceID_raw <- tmp[, -wwhich]
# user system elapsed
# 7.004 0.116 7.131 RNAmix_celseq2
# 41.932 0.532 42.429 sc_10x
rm(tmp)
system.time({tmp <- get_RaceID(2^t(exprData), is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RaceID_awst <- tmp[, -wwhich]
# user system elapsed
# 5.784 0.092 5.866 RNAmix_celseq2
# 47.252 0.560 47.759 sc_10x
### VST
if(!file.exists(paste0(prefix, "_VST.RData"))) {
load(paste0(prefix, "_counts.RData"))
require(DESeq2)
dds <- DESeqDataSetFromMatrix(countData = as.matrix(ddata),
colData = annotation.df,
design = ~1)
# vvst <- vst(dds, blind=FALSE)
#Error in vst(dds, blind = FALSE) :
# less than 'nsub' rows with mean normalized count > 5,
# it is recommended to use varianceStabilizingTransformation directly
system.time({vvst <- varianceStabilizingTransformation(dds, blind=FALSE)})
# user system elapsed
#1426.620 2.388 1430.940 sc_10x
# 157.372 0.020 157.395 RNAmix
vstData <- assay(vvst)
save(vstData, file = paste0(prefix, "_VST.RData"))
} else load(paste0(prefix, "_VST.RData"))
rm(tmp)
system.time({tmp <- get_Seurat(vstData, is_normalized = TRUE)})
# user system elapsed
#267.628 5.892 327.423 sc_10x_5cl
# 10.652 0.112 10.765 RNAmix_celseq2
# 33.912 0.584 34.494 sc_10x
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_VST_LoR <- tmp[, -wwhich]
rm(tmp)
system.time({tmp <- get_Seurat(vstData, is_normalized = TRUE, resolution = 1.6)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_VST_HiR <- tmp[, -wwhich]
# user system elapsed
#286.520 5.096 321.105 sc_10x_5cl
# 11.080 0.128 11.212 RNAmix_celseq2
# 34.228 0.620 34.858 sc_10x
rm(tmp)
system.time({tmp <- get_RaceID(2^vstData, is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RaceID_VST <- tmp[, -wwhich]
# user system elapsed
#2743.628 4.196 2777.134 sc_10x_5cl
# 8.992 0.096 9.055 RNAmix_celseq2
# 51.904 0.744 52.610 sc_10x
system.time({ans_SC3_VST <- get_SC3(vstData, is_normalized = TRUE)[, c("cell.col", "clustering")]})
# user system elapsed
# 145.884 10.376 2626.021 sc_10x_5cl
# 13.448 2.224 459.802 sc_10x
# 2.460 0.836 53.050 RNAmix_celseq2
system.time({ans_clusterExp_VST <- get_clusterExperiment(vstData, is_normalized = TRUE)[, c("cell.col", "clustering")]})
rm(tmp)
system.time({tmp <- get_RCA(vstData, is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RCA_vst <- tmp[, -wwhich]
# user system elapsed
# 8.208 0.000 40.342
if(!file.exists(paste0(prefix, "_scry.RData"))) {
load(paste0(prefix, "_counts.RData"))
library(scry)
scryData_pearson <- nullResiduals(as.matrix(ddata), type = "pearson")
save(scryData_pearson, file = paste0(prefix, "_scry.RData"))
} else load(paste0(prefix, "_scry.RData"))
rm(tmp)
system.time({tmp <- get_Seurat(scryData_pearson, is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_scry_LoR <- tmp[, -wwhich]
# user system elapsed
#284.504 8.032 356.628 sc_10x_5cl
# 16.140 0.172 16.313 RNAmix_celseq2
# 34.216 0.724 34.948 sc_10x
rm(tmp)
system.time({tmp <- get_Seurat(scryData_pearson, is_normalized = TRUE, resolution = 1.6)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_Seurat_scry_HiR <- tmp[, -wwhich]
# user system elapsed
#293.972 2.968 320.115 sc_10x_5cl
# 15.428 0.192 15.620 RNAmix_celseq2
# 34.496 0.908 35.406 sc_10x
rm(tmp)
system.time({tmp <- get_RaceID(2^scryData_pearson, is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RaceID_scry <- tmp[, -wwhich]
# user system elapsed
#2387.520 2.580 2407.494 sc_10x_5cl
# 6.892 0.100 6.976 RNAmix_celseq2
# 59.864 0.528 60.362 sc_10x
rm(tmp)
system.time({tmp <- get_RCA(scryData_pearson, is_normalized = TRUE)})
wwhich <- which(colnames(tmp) %in% colnames(annotation.df))
ans_RCA_scry <- tmp[, -wwhich]
# user system elapsed
#2387.520 2.580 2407.494 sc_10x_5cl
# 7.920 0.000 309.245 sc_10x
system.time({ans_SC3_scry <- get_SC3(scryData_pearson, is_normalized = TRUE)[, c("cell.col", "clustering")]})
# user system elapsed
# 143.712 11.704 4087.033 sc_10x_5cl
# 14.312 3.232 1097.622 sc_10x
# 2.484 0.848 76.398 RNAmix_celseq2
system.time({ans_clusterExp_scry <- get_clusterExperiment(scryData, is_normalized = TRUE)[, c("cell.col", "clustering")]})
if(file.exists(paste0(prefix, "_rsec_working.RData"))) {
load(paste0(prefix, "_rsec_working.RData"))
rm(tmp)
tmp <- annotation.df[attr(ans_clusterExp_raw, "rownames"),]
tmp$clustering <- attr(ans_clusterExp_raw, "listData")$clustering_res
tmp$clustering[tmp$clustering < 0] <- NA
tmp$clustering <- factor(tmp$clustering)
ans_clusterExp_raw <- tmp[, c("cell.col", "clustering")]
rm(tmp)
tmp <- annotation.df[attr(ans_clusterExp_awst, "rownames"),]
tmp$clustering <- attr(ans_clusterExp_awst, "listData")$clustering_res
tmp$clustering[tmp$clustering < 0] <- NA
tmp$clustering <- factor(tmp$clustering)
ans_clusterExp_awst <- tmp[, c("cell.col", "clustering")]
rm(tmp)
tmp <- annotation.df[attr(ans_clusterExp_scry, "rownames"),]
tmp$clustering <- attr(ans_clusterExp_scry, "listData")$clustering_res
tmp$clustering[tmp$clustering < 0] <- NA
tmp$clustering <- factor(tmp$clustering)
ans_clusterExp_scry <- tmp[, c("cell.col", "clustering")]
tmp <- annotation.df[attr(ans_clusterExp_vst, "rownames"),]
tmp$clustering <- attr(ans_clusterExp_vst, "listData")$clustering_res
tmp$clustering[tmp$clustering < 0] <- NA
tmp$clustering <- factor(tmp$clustering)
ans_clusterExp_vst <- tmp[, c("cell.col", "clustering")]
}
save(hhc, nrow_exprData, ncol_exprData, annotation.df, aCalinski,
pprcomp, ans_Rtsne, ans_umap, prefix,
ans_RaceID_awst, ans_RaceID_raw,
ans_RaceID_scry,
ans_RaceID_VST,
ans_RCA_raw, ans_RCA_awst,
ans_RCA_vst, ans_RCA_scry,
ans_SC3_raw, ans_SC3_awst,
ans_SC3_VST, ans_SC3_scry,
ans_Seurat_raw_LoR, ans_Seurat_raw_HiR,
ans_Seurat_awst_LoR, ans_Seurat_awst_HiR,
ans_Seurat_VST_LoR, ans_Seurat_VST_HiR,
ans_Seurat_scry_LoR, ans_Seurat_scry_HiR,
ans_clusterExp_raw, ans_clusterExp_awst,
ans_clusterExp_scry, ans_clusterExp_vst,
file = paste0(prefix, "_expression_working.RData"))
## cluster accuracy (eca): 0.9861
## cluster purity (ecp): 0.9806
## adjusted Rand's index (ari): 0.9632
## G index (geometric average of eca, ecp, and ari): 0.9766
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 5
## cluster accuracy (eca): 0.9767
## cluster purity (ecp): 0.8551
## adjusted Rand's index (ari): 0.5137
## G index (geometric average of eca, ecp, and ari): 0.7542
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 12
## cluster accuracy (eca): 0.9853
## cluster purity (ecp): 0.8087
## adjusted Rand's index (ari): 0.4089
## G index (geometric average of eca, ecp, and ari): 0.6881
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 17
## cluster accuracy (eca): 0.961
## cluster purity (ecp): 0.9081
## adjusted Rand's index (ari): 0.6685
## G index (geometric average of eca, ecp, and ari): 0.8356
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.8172
## cluster purity (ecp): 0.9366
## adjusted Rand's index (ari): 0.1493
## G index (geometric average of eca, ecp, and ari): 0.4852
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 2
## cluster accuracy (eca): 0.9161
## cluster purity (ecp): 0.8042
## adjusted Rand's index (ari): 0.4973
## G index (geometric average of eca, ecp, and ari): 0.7155
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 19
## cluster accuracy (eca): 0.9044
## cluster purity (ecp): 0.7996
## adjusted Rand's index (ari): 0.4613
## G index (geometric average of eca, ecp, and ari): 0.6936
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 20
## cluster accuracy (eca): 0.9161
## cluster purity (ecp): 0.8042
## adjusted Rand's index (ari): 0.4973
## G index (geometric average of eca, ecp, and ari): 0.7155
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 19
## cluster accuracy (eca): 0.928
## cluster purity (ecp): 0.5664
## adjusted Rand's index (ari): 0.1112
## G index (geometric average of eca, ecp, and ari): 0.3881
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 109
## cluster accuracy (eca): 0.9862
## cluster purity (ecp): 0.8837
## adjusted Rand's index (ari): 0.6
## G index (geometric average of eca, ecp, and ari): 0.8056
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 12
## cluster accuracy (eca): 0.9845
## cluster purity (ecp): 0.8366
## adjusted Rand's index (ari): 0.4787
## G index (geometric average of eca, ecp, and ari): 0.7333
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 15
## cluster accuracy (eca): 0.9869
## cluster purity (ecp): 0.8854
## adjusted Rand's index (ari): 0.5757
## G index (geometric average of eca, ecp, and ari): 0.7953
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 12
## cluster accuracy (eca): 0.9851
## cluster purity (ecp): 0.8765
## adjusted Rand's index (ari): 0.6074
## G index (geometric average of eca, ecp, and ari): 0.8064
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 12
## cluster accuracy (eca): 0.9837
## cluster purity (ecp): 0.7908
## adjusted Rand's index (ari): 0.3455
## G index (geometric average of eca, ecp, and ari): 0.6453
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 19
## cluster accuracy (eca): 0.9848
## cluster purity (ecp): 0.7871
## adjusted Rand's index (ari): 0.3394
## G index (geometric average of eca, ecp, and ari): 0.6408
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 20
## cluster accuracy (eca): 0.9846
## cluster purity (ecp): 0.7921
## adjusted Rand's index (ari): 0.3488
## G index (geometric average of eca, ecp, and ari): 0.648
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 19
## cluster accuracy (eca): 0.9833
## cluster purity (ecp): 0.8208
## adjusted Rand's index (ari): 0.4275
## G index (geometric average of eca, ecp, and ari): 0.7014
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 16
## cluster accuracy (eca): 0.9811
## cluster purity (ecp): 0.8702
## adjusted Rand's index (ari): 0.662
## G index (geometric average of eca, ecp, and ari): 0.8268
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 16
## cluster accuracy (eca): 0.9861
## cluster purity (ecp): 0.8265
## adjusted Rand's index (ari): 0.4292
## G index (geometric average of eca, ecp, and ari): 0.7046
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 20
## cluster accuracy (eca): 0.9843
## cluster purity (ecp): 0.8792
## adjusted Rand's index (ari): 0.6743
## G index (geometric average of eca, ecp, and ari): 0.8356
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 15
## cluster accuracy (eca): 0.9685
## cluster purity (ecp): 0.627
## adjusted Rand's index (ari): 0.1299
## G index (geometric average of eca, ecp, and ari): 0.4289
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 76
## cluster accuracy (eca): 0.9809
## cluster purity (ecp): 0.7198
## adjusted Rand's index (ari): 0.268
## G index (geometric average of eca, ecp, and ari): 0.5741
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 33
## cluster accuracy (eca): 0.9881
## cluster purity (ecp): 0.8742
## adjusted Rand's index (ari): 0.5816
## G index (geometric average of eca, ecp, and ari): 0.795
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 14
## cluster accuracy (eca): 0.9722
## cluster purity (ecp): 0.8234
## adjusted Rand's index (ari): 0.4267
## G index (geometric average of eca, ecp, and ari): 0.699
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 15
## cluster accuracy (eca): 0.9737
## cluster purity (ecp): 0.9243
## adjusted Rand's index (ari): 0.7998
## G index (geometric average of eca, ecp, and ari): 0.8962
## no. of clusters in theoretical partition: 5
## no. of clusters in estimated partition: 7
## cluster accuracy (eca): 0.9986
## cluster purity (ecp): 0.9988
## adjusted Rand's index (ari): 0.9967
## G index (geometric average of eca, ecp, and ari): 0.998
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 3
## cluster accuracy (eca): 0.9983
## cluster purity (ecp): 0.7338
## adjusted Rand's index (ari): 0.2877
## G index (geometric average of eca, ecp, and ari): 0.5951
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 15
## cluster accuracy (eca): 0.9975
## cluster purity (ecp): 0.7828
## adjusted Rand's index (ari): 0.3667
## G index (geometric average of eca, ecp, and ari): 0.6591
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 11
## cluster accuracy (eca): 0.9918
## cluster purity (ecp): 0.8339
## adjusted Rand's index (ari): 0.4737
## G index (geometric average of eca, ecp, and ari): 0.7318
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.8388
## cluster purity (ecp): 1
## adjusted Rand's index (ari): 0
## G index (geometric average of eca, ecp, and ari): 0
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 1
## cluster accuracy (eca): 0.8792
## cluster purity (ecp): 0.8204
## adjusted Rand's index (ari): 0.2626
## G index (geometric average of eca, ecp, and ari): 0.5743
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 6
## cluster accuracy (eca): 0.9035
## cluster purity (ecp): 0.826
## adjusted Rand's index (ari): 0.3623
## G index (geometric average of eca, ecp, and ari): 0.6466
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 6
## cluster accuracy (eca): 0.8792
## cluster purity (ecp): 0.8204
## adjusted Rand's index (ari): 0.2626
## G index (geometric average of eca, ecp, and ari): 0.5743
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 6
## cluster accuracy (eca): 0.9326
## cluster purity (ecp): 0.6146
## adjusted Rand's index (ari): 0.1716
## G index (geometric average of eca, ecp, and ari): 0.4616
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 30
## cluster accuracy (eca): 0.9961
## cluster purity (ecp): 0.8624
## adjusted Rand's index (ari): 0.5615
## G index (geometric average of eca, ecp, and ari): 0.7842
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 7
## cluster accuracy (eca): 0.9961
## cluster purity (ecp): 0.8616
## adjusted Rand's index (ari): 0.5564
## G index (geometric average of eca, ecp, and ari): 0.7817
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 7
## cluster accuracy (eca): 0.9975
## cluster purity (ecp): 0.8341
## adjusted Rand's index (ari): 0.464
## G index (geometric average of eca, ecp, and ari): 0.7281
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.9972
## cluster purity (ecp): 0.9064
## adjusted Rand's index (ari): 0.7032
## G index (geometric average of eca, ecp, and ari): 0.8598
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 6
## cluster accuracy (eca): 0.997
## cluster purity (ecp): 0.7973
## adjusted Rand's index (ari): 0.3923
## G index (geometric average of eca, ecp, and ari): 0.6781
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 10
## cluster accuracy (eca): 0.9968
## cluster purity (ecp): 0.7695
## adjusted Rand's index (ari): 0.3452
## G index (geometric average of eca, ecp, and ari): 0.6421
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 12
## cluster accuracy (eca): 0.9968
## cluster purity (ecp): 0.7784
## adjusted Rand's index (ari): 0.3518
## G index (geometric average of eca, ecp, and ari): 0.6487
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 11
## cluster accuracy (eca): 0.9967
## cluster purity (ecp): 0.7935
## adjusted Rand's index (ari): 0.3829
## G index (geometric average of eca, ecp, and ari): 0.6716
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 10
## cluster accuracy (eca): 0.9578
## cluster purity (ecp): 0.8545
## adjusted Rand's index (ari): 0.5415
## G index (geometric average of eca, ecp, and ari): 0.7624
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.8716
## cluster purity (ecp): 0.856
## adjusted Rand's index (ari): 0.5448
## G index (geometric average of eca, ecp, and ari): 0.7408
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.8533
## cluster purity (ecp): 0.8901
## adjusted Rand's index (ari): 0.6838
## G index (geometric average of eca, ecp, and ari): 0.8038
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 7
## cluster accuracy (eca): 0.9891
## cluster purity (ecp): 0.5296
## adjusted Rand's index (ari): 0.0943
## G index (geometric average of eca, ecp, and ari): 0.3669
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 50
## cluster accuracy (eca): 0.9985
## cluster purity (ecp): 0.7224
## adjusted Rand's index (ari): 0.2874
## G index (geometric average of eca, ecp, and ari): 0.5918
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 17
## cluster accuracy (eca): 0.9994
## cluster purity (ecp): 0.8615
## adjusted Rand's index (ari): 0.5808
## G index (geometric average of eca, ecp, and ari): 0.7938
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.9991
## cluster purity (ecp): 0.9187
## adjusted Rand's index (ari): 0.7194
## G index (geometric average of eca, ecp, and ari): 0.8708
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 5
## cluster accuracy (eca): 0.9965
## cluster purity (ecp): 0.9393
## adjusted Rand's index (ari): 0.8016
## G index (geometric average of eca, ecp, and ari): 0.9087
## no. of clusters in theoretical partition: 3
## no. of clusters in estimated partition: 5
## cluster accuracy (eca): 0.7685
## cluster purity (ecp): 0.8688
## adjusted Rand's index (ari): 0.337
## G index (geometric average of eca, ecp, and ari): 0.6082
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 4
## cluster accuracy (eca): 0.8033
## cluster purity (ecp): 0.8719
## adjusted Rand's index (ari): 0.4567
## G index (geometric average of eca, ecp, and ari): 0.6839
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 5
## cluster accuracy (eca): 0.8291
## cluster purity (ecp): 0.9185
## adjusted Rand's index (ari): 0.5029
## G index (geometric average of eca, ecp, and ari): 0.7262
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 4
## cluster accuracy (eca): 0.8425
## cluster purity (ecp): 0.6821
## adjusted Rand's index (ari): 0.3659
## G index (geometric average of eca, ecp, and ari): 0.5947
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 15
## cluster accuracy (eca): 0.6547
## cluster purity (ecp): 0.8763
## adjusted Rand's index (ari): 0.051
## G index (geometric average of eca, ecp, and ari): 0.3081
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 2
## cluster accuracy (eca): 0.7208
## cluster purity (ecp): 0.8634
## adjusted Rand's index (ari): 0.5072
## G index (geometric average of eca, ecp, and ari): 0.6809
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 7
## cluster accuracy (eca): 0.8067
## cluster purity (ecp): 0.8795
## adjusted Rand's index (ari): 0.5029
## G index (geometric average of eca, ecp, and ari): 0.7093
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 5
## cluster accuracy (eca): 0.7208
## cluster purity (ecp): 0.8634
## adjusted Rand's index (ari): 0.5072
## G index (geometric average of eca, ecp, and ari): 0.6809
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 7
## cluster accuracy (eca): 0.7312
## cluster purity (ecp): 0.7305
## adjusted Rand's index (ari): 0.2253
## G index (geometric average of eca, ecp, and ari): 0.4937
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 11
## cluster accuracy (eca): 0.9749
## cluster purity (ecp): 0.9546
## adjusted Rand's index (ari): 0.86
## G index (geometric average of eca, ecp, and ari): 0.9284
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.9514
## cluster purity (ecp): 0.9172
## adjusted Rand's index (ari): 0.7341
## G index (geometric average of eca, ecp, and ari): 0.862
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.9631
## cluster purity (ecp): 0.9318
## adjusted Rand's index (ari): 0.7623
## G index (geometric average of eca, ecp, and ari): 0.8812
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.8659
## cluster purity (ecp): 0.9802
## adjusted Rand's index (ari): 0.678
## G index (geometric average of eca, ecp, and ari): 0.8318
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 4
## cluster accuracy (eca): 0.9732
## cluster purity (ecp): 0.9348
## adjusted Rand's index (ari): 0.7859
## G index (geometric average of eca, ecp, and ari): 0.8942
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 9
## cluster accuracy (eca): 0.9576
## cluster purity (ecp): 0.9056
## adjusted Rand's index (ari): 0.6885
## G index (geometric average of eca, ecp, and ari): 0.8421
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 9
## cluster accuracy (eca): 0.9702
## cluster purity (ecp): 0.8676
## adjusted Rand's index (ari): 0.6395
## G index (geometric average of eca, ecp, and ari): 0.8135
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 11
## cluster accuracy (eca): 0.8728
## cluster purity (ecp): 0.84
## adjusted Rand's index (ari): 0.5191
## G index (geometric average of eca, ecp, and ari): 0.7247
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 8
## cluster accuracy (eca): 0.6101
## cluster purity (ecp): 0.8526
## adjusted Rand's index (ari): 0.1995
## G index (geometric average of eca, ecp, and ari): 0.4699
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 4
## cluster accuracy (eca): 0.4983
## cluster purity (ecp): 0.948
## adjusted Rand's index (ari): 0.1819
## G index (geometric average of eca, ecp, and ari): 0.4413
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 4
## cluster accuracy (eca): 0.3641
## cluster purity (ecp): 0.9804
## adjusted Rand's index (ari): 0.002
## G index (geometric average of eca, ecp, and ari): 0.0892
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 3
## cluster accuracy (eca): 0.8285
## cluster purity (ecp): 0.8513
## adjusted Rand's index (ari): 0.5455
## G index (geometric average of eca, ecp, and ari): 0.7273
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 7
## cluster accuracy (eca): 0.7376
## cluster purity (ecp): 0.5934
## adjusted Rand's index (ari): 0.1606
## G index (geometric average of eca, ecp, and ari): 0.4127
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 12
## cluster accuracy (eca): 0.809
## cluster purity (ecp): 0.9877
## adjusted Rand's index (ari): 0.4364
## G index (geometric average of eca, ecp, and ari): 0.7039
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 3
## cluster accuracy (eca): 0.7293
## cluster purity (ecp): 0.9037
## adjusted Rand's index (ari): 0.2309
## G index (geometric average of eca, ecp, and ari): 0.5339
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 3
## cluster accuracy (eca): 0.7898
## cluster purity (ecp): 0.9429
## adjusted Rand's index (ari): 0.2907
## G index (geometric average of eca, ecp, and ari): 0.6005
## no. of clusters in theoretical partition: 7
## no. of clusters in estimated partition: 3
| where | what | eca | ecp | ari | G | noOfClust_Th | noOfClust_Est |
|---|---|---|---|---|---|---|---|
| sc_10x_5cl | AWST protocol | 0.9861 | 0.9806 | 0.9632 | 0.9766 | 5 | 5 |
| sc_10x_5cl | RaceID + counts | 0.9767 | 0.8551 | 0.5137 | 0.7542 | 5 | 12 |
| sc_10x_5cl | RaceID + AWST | 0.9853 | 0.8087 | 0.4089 | 0.6881 | 5 | 17 |
| sc_10x_5cl | RaceID + VST | 0.9610 | 0.9081 | 0.6685 | 0.8356 | 5 | 8 |
| sc_10x_5cl | RaceID + Townes | 0.8172 | 0.9366 | 0.1493 | 0.4852 | 5 | 2 |
| sc_10x_5cl | RCA + counts | 0.9161 | 0.8042 | 0.4973 | 0.7155 | 5 | 19 |
| sc_10x_5cl | RCA + AWST | 0.9044 | 0.7996 | 0.4613 | 0.6936 | 5 | 20 |
| sc_10x_5cl | RCA + VST | 0.9161 | 0.8042 | 0.4973 | 0.7155 | 5 | 19 |
| sc_10x_5cl | RCA + Townes | 0.9280 | 0.5664 | 0.1112 | 0.3881 | 5 | 109 |
| sc_10x_5cl | Seurat (LoRes) + counts | 0.9862 | 0.8837 | 0.6000 | 0.8056 | 5 | 12 |
| sc_10x_5cl | Seurat (LoRes) + AWST | 0.9845 | 0.8366 | 0.4787 | 0.7333 | 5 | 15 |
| sc_10x_5cl | Seurat (LoRes) + VST | 0.9869 | 0.8854 | 0.5757 | 0.7953 | 5 | 12 |
| sc_10x_5cl | Seurat (LoRes) + Townes | 0.9851 | 0.8765 | 0.6074 | 0.8064 | 5 | 12 |
| sc_10x_5cl | Seurat (HiRes) + counts | 0.9837 | 0.7908 | 0.3455 | 0.6453 | 5 | 19 |
| sc_10x_5cl | Seurat (HiRes) + AWST | 0.9848 | 0.7871 | 0.3394 | 0.6408 | 5 | 20 |
| sc_10x_5cl | Seurat (HiRes) + VST | 0.9846 | 0.7921 | 0.3488 | 0.6480 | 5 | 19 |
| sc_10x_5cl | Seurat (HiRes) + Townes | 0.9833 | 0.8208 | 0.4275 | 0.7014 | 5 | 16 |
| sc_10x_5cl | SC3 + counts | 0.9811 | 0.8702 | 0.6620 | 0.8268 | 5 | 16 |
| sc_10x_5cl | SC3 + AWST | 0.9861 | 0.8265 | 0.4292 | 0.7046 | 5 | 20 |
| sc_10x_5cl | SC3 + VST | 0.9843 | 0.8792 | 0.6743 | 0.8356 | 5 | 15 |
| sc_10x_5cl | SC3 + Townes | 0.9685 | 0.6270 | 0.1299 | 0.4289 | 5 | 76 |
| sc_10x_5cl | clustExp + counts | 0.9809 | 0.7198 | 0.2680 | 0.5741 | 5 | 33 |
| sc_10x_5cl | clustExp + AWST | 0.9881 | 0.8742 | 0.5816 | 0.7950 | 5 | 14 |
| sc_10x_5cl | clustExp + VST | 0.9722 | 0.8234 | 0.4267 | 0.6990 | 5 | 15 |
| sc_10x_5cl | clustExp + Townes | 0.9737 | 0.9243 | 0.7998 | 0.8962 | 5 | 7 |
| sc_10x | AWST protocol | 0.9986 | 0.9988 | 0.9967 | 0.9980 | 3 | 3 |
| sc_10x | RaceID + counts | 0.9983 | 0.7338 | 0.2877 | 0.5951 | 3 | 15 |
| sc_10x | RaceID + AWST | 0.9975 | 0.7828 | 0.3667 | 0.6591 | 3 | 11 |
| sc_10x | RaceID + VST | 0.9918 | 0.8339 | 0.4737 | 0.7318 | 3 | 8 |
| sc_10x | RaceID + Townes | 0.8388 | 1.0000 | 0.0000 | 0.0000 | 3 | 1 |
| sc_10x | RCA + counts | 0.8792 | 0.8204 | 0.2626 | 0.5743 | 3 | 6 |
| sc_10x | RCA + AWST | 0.9035 | 0.8260 | 0.3623 | 0.6466 | 3 | 6 |
| sc_10x | RCA + VST | 0.8792 | 0.8204 | 0.2626 | 0.5743 | 3 | 6 |
| sc_10x | RCA + Townes | 0.9326 | 0.6146 | 0.1716 | 0.4616 | 3 | 30 |
| sc_10x | Seurat (LoRes) + counts | 0.9961 | 0.8624 | 0.5615 | 0.7842 | 3 | 7 |
| sc_10x | Seurat (LoRes) + AWST | 0.9961 | 0.8616 | 0.5564 | 0.7817 | 3 | 7 |
| sc_10x | Seurat (LoRes) + VST | 0.9975 | 0.8341 | 0.4640 | 0.7281 | 3 | 8 |
| sc_10x | Seurat (LoRes) + Townes | 0.9972 | 0.9064 | 0.7032 | 0.8598 | 3 | 6 |
| sc_10x | Seurat (HiRes) + counts | 0.9970 | 0.7973 | 0.3923 | 0.6781 | 3 | 10 |
| sc_10x | Seurat (HiRes) + AWST | 0.9968 | 0.7695 | 0.3452 | 0.6421 | 3 | 12 |
| sc_10x | Seurat (HiRes) + VST | 0.9968 | 0.7784 | 0.3518 | 0.6487 | 3 | 11 |
| sc_10x | Seurat (HiRes) + Townes | 0.9967 | 0.7935 | 0.3829 | 0.6716 | 3 | 10 |
| sc_10x | SC3 + counts | 0.9578 | 0.8545 | 0.5415 | 0.7624 | 3 | 8 |
| sc_10x | SC3 + AWST | 0.8716 | 0.8560 | 0.5448 | 0.7408 | 3 | 8 |
| sc_10x | SC3 + VST | 0.8533 | 0.8901 | 0.6838 | 0.8038 | 3 | 7 |
| sc_10x | SC3 + Townes | 0.9891 | 0.5296 | 0.0943 | 0.3669 | 3 | 50 |
| sc_10x | clustExp + counts | 0.9985 | 0.7224 | 0.2874 | 0.5918 | 3 | 17 |
| sc_10x | clustExp + AWST | 0.9994 | 0.8615 | 0.5808 | 0.7938 | 3 | 8 |
| sc_10x | clustExp + VST | 0.9991 | 0.9187 | 0.7194 | 0.8708 | 3 | 5 |
| sc_10x | clustExp + Townes | 0.9965 | 0.9393 | 0.8016 | 0.9087 | 3 | 5 |
| RNAmix_celseq2 | AWST protocol | 0.7685 | 0.8688 | 0.3370 | 0.6082 | 7 | 4 |
| RNAmix_celseq2 | RaceID + counts | 0.8033 | 0.8719 | 0.4567 | 0.6839 | 7 | 5 |
| RNAmix_celseq2 | RaceID + AWST | 0.8291 | 0.9185 | 0.5029 | 0.7262 | 7 | 4 |
| RNAmix_celseq2 | RaceID + VST | 0.8425 | 0.6821 | 0.3659 | 0.5947 | 7 | 15 |
| RNAmix_celseq2 | RaceID + Townes | 0.6547 | 0.8763 | 0.0510 | 0.3081 | 7 | 2 |
| RNAmix_celseq2 | RCA + counts | 0.7208 | 0.8634 | 0.5072 | 0.6809 | 7 | 7 |
| RNAmix_celseq2 | RCA + AWST | 0.8067 | 0.8795 | 0.5029 | 0.7093 | 7 | 5 |
| RNAmix_celseq2 | RCA + VST | 0.7208 | 0.8634 | 0.5072 | 0.6809 | 7 | 7 |
| RNAmix_celseq2 | RCA + Townes | 0.7312 | 0.7305 | 0.2253 | 0.4937 | 7 | 11 |
| RNAmix_celseq2 | Seurat (LoRes) + counts | 0.9749 | 0.9546 | 0.8600 | 0.9284 | 7 | 8 |
| RNAmix_celseq2 | Seurat (LoRes) + AWST | 0.9514 | 0.9172 | 0.7341 | 0.8620 | 7 | 8 |
| RNAmix_celseq2 | Seurat (LoRes) + VST | 0.9631 | 0.9318 | 0.7623 | 0.8812 | 7 | 8 |
| RNAmix_celseq2 | Seurat (LoRes) + Townes | 0.8659 | 0.9802 | 0.6780 | 0.8318 | 7 | 4 |
| RNAmix_celseq2 | Seurat (HiRes) + counts | 0.9732 | 0.9348 | 0.7859 | 0.8942 | 7 | 9 |
| RNAmix_celseq2 | Seurat (HiRes) + AWST | 0.9576 | 0.9056 | 0.6885 | 0.8421 | 7 | 9 |
| RNAmix_celseq2 | Seurat (HiRes) + VST | 0.9702 | 0.8676 | 0.6395 | 0.8135 | 7 | 11 |
| RNAmix_celseq2 | Seurat (HiRes) + Townes | 0.8728 | 0.8400 | 0.5191 | 0.7247 | 7 | 8 |
| RNAmix_celseq2 | SC3 + counts | 0.6101 | 0.8526 | 0.1995 | 0.4699 | 7 | 4 |
| RNAmix_celseq2 | SC3 + AWST | 0.4983 | 0.9480 | 0.1819 | 0.4413 | 7 | 4 |
| RNAmix_celseq2 | SC3 + VST | 0.3641 | 0.9804 | 0.0020 | 0.0892 | 7 | 3 |
| RNAmix_celseq2 | SC3 + Townes | 0.8285 | 0.8513 | 0.5455 | 0.7273 | 7 | 7 |
| RNAmix_celseq2 | clustExp + counts | 0.7376 | 0.5934 | 0.1606 | 0.4127 | 7 | 12 |
| RNAmix_celseq2 | clustExp + AWST | 0.8090 | 0.9877 | 0.4364 | 0.7039 | 7 | 3 |
| RNAmix_celseq2 | clustExp + VST | 0.7293 | 0.9037 | 0.2309 | 0.5339 | 7 | 3 |
| RNAmix_celseq2 | clustExp + Townes | 0.7898 | 0.9429 | 0.2907 | 0.6005 | 7 | 3 |
rm(list = ls())
setwd("~/Dropbox/AWST/mixology/")
ttable <- read.csv("mixoloy20200925_results.tsv", sep = "\t")
jobName <- "mixoloy20200925"
library(xtable)
x.table <- xtable(ttable)
#print(xtable(ttable), include.rownames = FALSE)
#rm(list = ls())
#setwd("~/Dropbox/AWST/mixology/")
#ttable <- read.csv("mixoloy20200925_results.tsv", sep = "\t")
#jobName <- "mixoloy20200925"
save_png <- FALSE
ttable <- results
ttable$method <- NA
awst_protocol <- ttable[grep("protocol", ttable$what),]
ttable <- ttable[-grep("protocol", ttable$what),]
ttable <- ttable[-grep("LoRes", ttable$what),]
ttable$method[grep("AWST", ttable$what)] <- "AWST"
ttable$method[grep("VST", ttable$what)] <- "VST"
ttable$method[grep("counts", ttable$what)] <- "counts"
ttable$method[grep("Townes", ttable$what)] <- "Townes"
if(save_png) png(file = paste0(jobName, "_final_performance.png"), width = 600, height = 350, res = 1/300)
boxplot(ttable$G ~ ttable$method, ylim = c(0, 1),
xlab = "pre-processing methods",
ylab = "performance (G index)")#, border = "gray50")
points(rep(1.13, 3), awst_protocol$G, pch = 20)
#text(rep(1.13, 3), awst_protocol$G + c(0.015, -0.055, -0.02), awst_protocol$where, pos = 4)
#text(rep(1.13, 3), awst_protocol$G + c(0.015, -0.055, -0.02), awst_protocol$what, pos = 2)
#dev.off()
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=it_IT.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=it_IT.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] clues_0.6.2.2 dendextend_1.13.4 steFunctions_2019.04.29
## [4] knitr_1.28
##
## loaded via a namespace (and not attached):
## [1] umap_0.2.5.0 Rcpp_1.0.3 RSpectra_0.16-0 highr_0.8
## [5] compiler_3.6.3 pillar_1.4.3 viridis_0.5.1 iterators_1.0.12
## [9] tools_3.6.3 digest_0.6.25 jsonlite_1.6.1 lattice_0.20-41
## [13] evaluate_0.14 lifecycle_0.2.0 tibble_2.1.3 gtable_0.3.0
## [17] viridisLite_0.3.0 pkgconfig_2.0.3 rlang_0.4.5 Matrix_1.2-18
## [21] foreach_1.4.8 rstudioapi_0.11 yaml_2.2.1 parallel_3.6.3
## [25] xfun_0.12 gridExtra_2.3 stringr_1.4.0 dplyr_0.8.5
## [29] askpass_1.1 grid_3.6.3 tidyselect_1.0.0 reticulate_1.14
## [33] glue_1.3.2 R6_2.4.1 rmarkdown_2.1 ggplot2_3.3.0
## [37] purrr_0.3.3 magrittr_1.5 scales_1.1.0 codetools_0.2-16
## [41] htmltools_0.4.0 assertthat_0.2.1 colorspace_1.4-1 stringi_1.4.6
## [45] openssl_1.4.1 doParallel_1.0.15 munsell_0.5.0 crayon_1.3.4